<!DOCTYPE html>
<html>

<head>
  <meta charset="utf-8">
  <meta name="description"
    content="Medical SAM 2: Segment Medical Images as Video via Segment Anything Model 2">
  <meta name="keywords" content="Medical-SAM2, Medical, SAM, Segmentation, Image, Videl">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>Medical SAM 2: Segment Medical Images as Video via Segment Anything Model 2</title>
  <!-- Global site tag (gtag.js) - Google Analytics -->
  <script async src="https://www.googletagmanager.com/gtag/js?id=G-PYVRSFMDRL"></script>
  <script>
    window.dataLayer = window.dataLayer || [];

    function gtag() {
      dataLayer.push(arguments);
    }

    gtag('js', new Date());

    gtag('config', 'G-PYVRSFMDRL');
  </script>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">

  <link rel="stylesheet" href="./static/css/bulma.min.css">
  <link rel="stylesheet" href="./static/css/bulma-carousel.min.css">
  <link rel="stylesheet" href="./static/css/bulma-slider.min.css">
  <link rel="stylesheet" href="./static/css/fontawesome.all.min.css">
  <link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/jpswalsh/academicons@1/css/academicons.min.css">
  <link rel="stylesheet" href="./static/css/index.css">
  <link rel="icon" href="./static/images/favicon.svg">

  <script src="https://ajax.googleapis.com/ajax/libs/jquery/3.5.1/jquery.min.js"></script>
  <script defer src="./static/js/fontawesome.all.min.js"></script>
  <script src="./static/js/bulma-carousel.min.js"></script>
  <script src="./static/js/bulma-slider.min.js"></script>
  <script src="./static/js/index.js"></script>

</head>
<body data-new-gr-c-s-check-loaded="14.1089.0" data-gr-ext-installed="" class="vsc-initialized">

  <section class="hero">
    <div class="hero-body">
      <div class="container is-max-desktop">
        <div class="columns is-centered">
          <div class="column has-text-centered">
            <h1 class="title is-1 publication-title">Medical SAM 2: Segment Medical Images as Video via Segment Anything Model 2
            </h1>
            <div class="is-size-5 publication-authors">
              <span class="author-block"></span>
                Jiayuan Zhu<sup>1</sup>,</span>
              <span class="author-block">
                <a href="https://abdullahamdi.com/">Abdullah Hamdi</a><sup>1</sup>,</span>
              <span class="author-block">
                Yunli Qi<sup>1</sup>,</span>
              <span class="author-block">
                Yueming Jin<sup>2</sup>,</span>
              <span class="author-block">
                Junde Wu<sup>1</sup>,</span>
            </div>

            <div class="is-size-5 publication-authors">
              <span class="author-block"><sup>1</sup>University of Oxford</span>
              <span class="author-block"><sup>2</sup>National University of Singapore</span>
            </div>
            <div class="column has-text-centered">
              <div class="publication-links">
                <!-- PDF Link. -->
                <span class="link-block">
                  <a href="./static/assets/MedicalSam2.pdf"
                    class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <svg class="svg-inline--fa fa-file-pdf fa-w-12" aria-hidden="true" focusable="false"
                        data-prefix="fas" data-icon="file-pdf" role="img" xmlns="http://www.w3.org/2000/svg"
                        viewBox="0 0 384 512" data-fa-i2svg="">
                        <path fill="currentColor"
                          d="M181.9 256.1c-5-16-4.9-46.9-2-46.9 8.4 0 7.6 36.9 2 46.9zm-1.7 47.2c-7.7 20.2-17.3 43.3-28.4 62.7 18.3-7 39-17.2 62.9-21.9-12.7-9.6-24.9-23.4-34.5-40.8zM86.1 428.1c0 .8 13.2-5.4 34.9-40.2-6.7 6.3-29.1 24.5-34.9 40.2zM248 160h136v328c0 13.3-10.7 24-24 24H24c-13.3 0-24-10.7-24-24V24C0 10.7 10.7 0 24 0h200v136c0 13.2 10.8 24 24 24zm-8 171.8c-20-12.2-33.3-29-42.7-53.8 4.5-18.5 11.6-46.6 6.2-64.2-4.7-29.4-42.4-26.5-47.8-6.8-5 18.3-.4 44.1 8.1 77-11.6 27.6-28.7 64.6-40.8 85.8-.1 0-.1.1-.2.1-27.1 13.9-73.6 44.5-54.5 68 5.6 6.9 16 10 21.5 10 17.9 0 35.7-18 61.1-61.8 25.8-8.5 54.1-19.1 79-23.2 21.7 11.8 47.1 19.5 64 19.5 29.2 0 31.2-32 19.7-43.4-13.9-13.6-54.3-9.7-73.6-7.2zM377 105L279 7c-4.5-4.5-10.6-7-17-7h-6v128h128v-6.1c0-6.3-2.5-12.4-7-16.9zm-74.1 255.3c4.1-2.7-2.5-11.9-42.8-9 37.1 15.8 42.8 9 42.8 9z">
                        </path>
                      </svg><!-- <i class="fas fa-file-pdf"></i> Font Awesome fontawesome.com -->
                    </span>
                    <span>Paper</span>
                  </a>
                </span>
                <span class="link-block">
                  <a href="https://arxiv.org/abs/2408.00874" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <i class="ai ai-arxiv"></i>
                    </span>
                    <span>arXiv</span>
                  </a>
                </span>
                <!-- Video Link. -->
                <span class="link-block">
                  <a href="https://youtu.be/yi0UiH1DhZA" class="external-link button is-normal is-rounded is-dark">
                    <span class="icon">
                      <svg class="svg-inline--fa fa-youtube fa-w-18" aria-hidden="true" focusable="false"
                        data-prefix="fab" data-icon="youtube" role="img" xmlns="http://www.w3.org/2000/svg"
                        viewBox="0 0 576 512" data-fa-i2svg="">
                        <path fill="currentColor"
                          d="M549.655 124.083c-6.281-23.65-24.787-42.276-48.284-48.597C458.781 64 288 64 288 64S117.22 64 74.629 75.486c-23.497 6.322-42.003 24.947-48.284 48.597-11.412 42.867-11.412 132.305-11.412 132.305s0 89.438 11.412 132.305c6.281 23.65 24.787 41.5 48.284 47.821C117.22 448 288 448 288 448s170.78 0 213.371-11.486c23.497-6.321 42.003-24.171 48.284-47.821 11.412-42.867 11.412-132.305 11.412-132.305s0-89.438-11.412-132.305zm-317.51 213.508V175.185l142.739 81.205-142.739 81.201z">
                        </path>
                      </svg><!-- <i class="fab fa-youtube"></i> Font Awesome fontawesome.com -->
                    </span>
                    <span>Video</span>
                  </a>
                </span>


              <!-- Code Link. -->
              <span class="link-block">
                <a href="https://github.com/SuperMedIntel/Medical-SAM2" class="external-link button is-normal is-rounded is-dark">
                  <span class="icon">
                    <i class="fab fa-github"></i>
                  </span>
                  <span>Code</span>
                </a>
              </span>
              </div>
            </div>
          </div>
        </div>
      </div>
    </div>
  </section>

  <section class="hero teaser">
    <div class="container is-max-desktop">
      <div class="hero-body">
        <div class="vsc-controller"></div>
        <div style="display: flex; justify-content: center;">
          <img src="./static/assets/images/facial.png" alt="MY ALT TEXT" width="500" height="200"/>
        </div>
        <br>
        <h2 class=" subtitle has-text-centered">
          When provided with a prompt in one 3D slice, MedSAM-2 can segment all later spatial-temporal 3D frames. When given a prompt in one 2D image, MedSAM-2 can accurately segment other 2D images that are not temporally related using the same criteria, which is an emergence of One-prompt Segmentation capability.
        </h2>
      </div>
    </div>
  </section>



  <section class="section">
    <div class="container is-max-desktop">
      <!-- Abstract. -->
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          <h2 class="title is-3">Abstract</h2>
          <div class="content has-text-justified">
            Medical image segmentation plays a pivotal role in clinical diagnostics and treatment planning, 
            yet existing models often face challenges in generalization and in handling both 2D and 3D data 
            uniformly. In this paper, we introduce Medical SAM 2 (<b>MedSAM-2</b>), a generalized auto-tracking 
            model for universal 2D and 3D medical image segmentation. The core concept is to leverage the 
            Segment Anything Model 2 (<a href="https://arxiv.org/abs/2408.00714">SAM2</a>) pipeline to treat 
            all 2D and 3D medical segmentation tasks as a video object tracking problem. To put it into 
            practice, we propose a novel <i>self-sorting memory bank</i> mechanism that dynamically selects 
            informative embeddings based on confidence and dissimilarity, regardless of temporal order. 
            This mechanism not only significantly improves performance in 3D medical image segmentation but 
            also unlocks a <i>One-Prompt Segmentation</i> capability for 2D images, allowing segmentation 
            across multiple images from a single prompt without temporal relationships. We evaluated 
            MedSAM-2 on five 2D tasks and nine 3D tasks, including white blood cells, optic cups, 
            retinal vessels, mandibles, coronary arteries, kidney tumors, liver tumors, breast cancer, 
            nasopharynx cancer, vestibular schwannoma, mediastinal lymph nodules, cerebral artery, 
            inferior alveolar nerve, and abdominal organs, comparing it against state-of-the-art (SOTA) 
            models in task-tailored, general and interactive segmentation settings. Our findings demonstrate 
            that MedSAM-2 surpasses a wide range of existing models and updates new SOTA on several benchmarks. 

          </div>
        </div>
      </div>
      <!--/ Abstract. -->
      <!-- Paper video. -->
      <div class="columns is-centered has-text-centered">
        <div class="column is-four-fifths">
          <h2 class="title is-3">Video</h2>
          <div class="publication-video"> 
          <iframe width="560" height="315" src="https://www.youtube.com/embed/yi0UiH1DhZA" title="YouTube video player"
            frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture"
            allowfullscreen></iframe>
          </div>
        </div>
      </div>
      <!--/ Paper video. -->
    </div>
  </section>



  <section class="section">
    <div class="container is-max-desktop">
      <h3 class="title is-4">MedSAM-2 Framework</h3>
      <div class="content has-text-centered">
        <div class="vsc-controller"></div>
        <img src="./static/assets/images/framework.png" alt="MY ALT TEXT"/>
        Building on the SAM2 framework, we propose treating 3D medical images and 2D medical image flows as 
        videos to facilitate memory-enhanced medical image segmentation. This approach not only improves performance 
        in 3D medical image segmentation but also unlocks One-Prompt Segmentation capability for 2D medical image 
        flows. This is achieved by incorporating our proposed <b>Self-Sorting Memory Bank</b>, which selects the most 
        confident embeddings based on the confidence predictions (&alpha;, &beta;, &gamma;) from the mask decoder.
      </div>
    </section>


    <section class="section">
      <div class="container is-max-desktop">
        <h3 class="title is-4">3D Medical Images Segmentation Performance & Visualization</h3>
        <div class="content has-text-centered">
          <div class="vsc-controller"></div>
          <img src="./static/assets/images/3D_result.png" width="100%">
          We show the comparison of MedSAM-2 with task-tailored models, interactive generalized models, 
          and auto-tracking generalized models. Evaluated on 11 unseen tasks by Dice Score (%).
          <img src="./static/assets/images/3D_vis.png" width="100%">
          We show comparison of MedSAM, our MedSAM-2, and ground truth on sequential 3D medical image segmentation 
          on the BTCV dataset. Note how our MedSAM-2 produce more consistent 3D predictions leveraging the 3D 
          context and maintaining high generalization capability compared to MedSAM.
        </div>
      </div>
    </section>

    <section class="section"></section>
      <div class="container is-max-desktop">
        <h3 class="title is-4">2D Medical Images Segmentation Performance & Visualization</h3>
        <div class="content has-text-centered">
          <div class="vsc-controller"></div>
          <img src="./static/assets/images/2D_result.png" width="100%">
          We show the comparison of MedSAM-2 with SOTA segmentation methods over BTCV dataset evaluated by Dice 
          Score (%). Task-tailored models, interactive generalized models, auto-tracking generalized models 
          are marked in yellow, green, blue.
          <img src="./static/assets/images/2D_vis.png" width="100%">
          We show several examples of 2D segmentation on diverse datasets.
        </div>
      </div>
    </section>

  <section class="section" id="BibTeX">
    <div class="container is-max-desktop content">
      <h2 class="title">BibTeX</h2>
      <pre><code>@misc{zhu_medical_2024,
      title={Medical SAM 2: Segment medical images as video via Segment Anything Model 2},
      author={Jiayuan Zhu and Abdullah Hamdi and Yunli Qi and Yueming Jin and Junde Wu},
      year = {2024},
      eprint={2408.00874},
      archivePrefix={arXiv},
      primaryClass={cs.CV}
     }
      </code></pre>
    </div>
  </section>





  <footer class="footer">
    <div class="container">
      <div class="content has-text-centered">
        <a class="icon-link" href="./static/assets/MedicalSam2.pdf">
          <svg class="svg-inline--fa fa-file-pdf fa-w-12" aria-hidden="true" focusable="false" data-prefix="fas"
            data-icon="file-pdf" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 384 512" data-fa-i2svg="">
            <path fill="currentColor"
              d="M181.9 256.1c-5-16-4.9-46.9-2-46.9 8.4 0 7.6 36.9 2 46.9zm-1.7 47.2c-7.7 20.2-17.3 43.3-28.4 62.7 18.3-7 39-17.2 62.9-21.9-12.7-9.6-24.9-23.4-34.5-40.8zM86.1 428.1c0 .8 13.2-5.4 34.9-40.2-6.7 6.3-29.1 24.5-34.9 40.2zM248 160h136v328c0 13.3-10.7 24-24 24H24c-13.3 0-24-10.7-24-24V24C0 10.7 10.7 0 24 0h200v136c0 13.2 10.8 24 24 24zm-8 171.8c-20-12.2-33.3-29-42.7-53.8 4.5-18.5 11.6-46.6 6.2-64.2-4.7-29.4-42.4-26.5-47.8-6.8-5 18.3-.4 44.1 8.1 77-11.6 27.6-28.7 64.6-40.8 85.8-.1 0-.1.1-.2.1-27.1 13.9-73.6 44.5-54.5 68 5.6 6.9 16 10 21.5 10 17.9 0 35.7-18 61.1-61.8 25.8-8.5 54.1-19.1 79-23.2 21.7 11.8 47.1 19.5 64 19.5 29.2 0 31.2-32 19.7-43.4-13.9-13.6-54.3-9.7-73.6-7.2zM377 105L279 7c-4.5-4.5-10.6-7-17-7h-6v128h128v-6.1c0-6.3-2.5-12.4-7-16.9zm-74.1 255.3c4.1-2.7-2.5-11.9-42.8-9 37.1 15.8 42.8 9 42.8 9z">
            </path>
          </svg><!-- <i class="fas fa-file-pdf"></i> Font Awesome fontawesome.com -->
        </a>
        <a class="icon-link" href="https://github.com/jiayuanz3" disabled="">
          <svg class="svg-inline--fa fa-github fa-w-16" aria-hidden="true" focusable="false" data-prefix="fab"
            data-icon="github" role="img" xmlns="http://www.w3.org/2000/svg" viewBox="0 0 496 512" data-fa-i2svg="">
            <path fill="currentColor"
              d="M165.9 397.4c0 2-2.3 3.6-5.2 3.6-3.3.3-5.6-1.3-5.6-3.6 0-2 2.3-3.6 5.2-3.6 3-.3 5.6 1.3 5.6 3.6zm-31.1-4.5c-.7 2 1.3 4.3 4.3 4.9 2.6 1 5.6 0 6.2-2s-1.3-4.3-4.3-5.2c-2.6-.7-5.5.3-6.2 2.3zm44.2-1.7c-2.9.7-4.9 2.6-4.6 4.9.3 2 2.9 3.3 5.9 2.6 2.9-.7 4.9-2.6 4.6-4.6-.3-1.9-3-3.2-5.9-2.9zM244.8 8C106.1 8 0 113.3 0 252c0 110.9 69.8 205.8 169.5 239.2 12.8 2.3 17.3-5.6 17.3-12.1 0-6.2-.3-40.4-.3-61.4 0 0-70 15-84.7-29.8 0 0-11.4-29.1-27.8-36.6 0 0-22.9-15.7 1.6-15.4 0 0 24.9 2 38.6 25.8 21.9 38.6 58.6 27.5 72.9 20.9 2.3-16 8.8-27.1 16-33.7-55.9-6.2-112.3-14.3-112.3-110.5 0-27.5 7.6-41.3 23.6-58.9-2.6-6.5-11.1-33.3 2.6-67.9 20.9-6.5 69 27 69 27 20-5.6 41.5-8.5 62.8-8.5s42.8 2.9 62.8 8.5c0 0 48.1-33.6 69-27 13.7 34.7 5.2 61.4 2.6 67.9 16 17.7 25.8 31.5 25.8 58.9 0 96.5-58.9 104.2-114.8 110.5 9.2 7.9 17 22.9 17 46.4 0 33.7-.3 75.4-.3 83.6 0 6.5 4.6 14.4 17.3 12.1C428.2 457.8 496 362.9 496 252 496 113.3 383.5 8 244.8 8zM97.2 352.9c-1.3 1-1 3.3.7 5.2 1.6 1.6 3.9 2.3 5.2 1 1.3-1 1-3.3-.7-5.2-1.6-1.6-3.9-2.3-5.2-1zm-10.8-8.1c-.7 1.3.3 2.9 2.3 3.9 1.6 1 3.6.7 4.3-.7.7-1.3-.3-2.9-2.3-3.9-2-.6-3.6-.3-4.3.7zm32.4 35.6c-1.6 1.3-1 4.3 1.3 6.2 2.3 2.3 5.2 2.6 6.5 1 1.3-1.3.7-4.3-1.3-6.2-2.2-2.3-5.2-2.6-6.5-1zm-11.4-14.7c-1.6 1-1.6 3.6 0 5.9 1.6 2.3 4.3 3.3 5.6 2.3 1.6-1.3 1.6-3.9 0-6.2-1.4-2.3-4-3.3-5.6-2z">
            </path>
          </svg><!-- <i class="fab fa-github"></i> Font Awesome fontawesome.com -->
        </a>
      </div>
      <div class="columns is-centered">
        <div class="column is-8">
          <div class="content">
            <p style="text-align:center">
              Source code mainly borrowed from Abdullah Hamdi</a>'s <a
                href="https://abdullahamdi.com/">previous projects</a>.
            </p>
            <p style="text-align:center">
              Please contact <a href="mailto:jiayuan.zhu@ieee.org">Jiayuan Zhu</a> for feedback and questions.
            </p>

          </div>
        </div>
      </div>
    </div>
  </footer>




</body>
<grammarly-desktop-integration data-grammarly-shadow-root="true"></grammarly-desktop-integration>

</html>